library(readr)
library(leaps)
library(car)
## Loading required package: carData
library(mosaic)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: lattice
## Loading required package: ggformula
## Loading required package: ggplot2
## Loading required package: ggstance
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:car':
## 
##     deltaMethod, logit
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
AmesTrain17 <- read.csv("~/Documents/(2) Sophomore Year/Second Semester (SPR 20)/STOR 455/AmesTrain17.csv")

AmesTrainNum = select(AmesTrain17, -Order, -LotConfig, -HouseStyle, -ExteriorQ, -ExteriorC, -Foundation, -BasementHt, -BasementC, -BasementFin, -HeatingQC, -CentralAir, -KitchenQ, -GarageType, -GarageQ, -GarageC)
ShowSubsets=function(regout){
  z=summary(regout)
  q=as.data.frame(z$outmat)
  q$Rsq=round(z$rsq*100,2)
  q$adjRsq=round(z$adjr2*100,2)
  q$Cp=round(z$cp,2)
  return(q)
}
bestsubsets = regsubsets(Price~., data = AmesTrainNum, nbest = 1, nvmax = 30)
ShowSubsets(bestsubsets)

PART 1

Mod1 = lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementUnFinSF+FirstSF+SecondSF+Bedroom+TotalRooms+GarageCars+GarageSF+ScreenPorchSF, data=AmesTrainNum)

summary(Mod1)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementUnFinSF + 
##     FirstSF + SecondSF + Bedroom + TotalRooms + GarageCars + 
##     GarageSF + ScreenPorchSF, data = AmesTrainNum)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -125.935  -16.386   -1.499   14.099  187.246 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.300e+03  1.481e+02  -8.773  < 2e-16 ***
## LotFrontage      1.323e-01  3.706e-02   3.570 0.000387 ***
## LotArea          6.973e-04  1.074e-04   6.493 1.80e-10 ***
## Quality          1.619e+01  1.457e+00  11.110  < 2e-16 ***
## Condition        6.311e+00  1.321e+00   4.779 2.24e-06 ***
## YearBuilt        4.735e-01  6.956e-02   6.806 2.49e-11 ***
## YearRemodel      1.324e-01  8.318e-02   1.592 0.111879    
## BasementFinSF    4.097e-02  4.990e-03   8.211 1.41e-15 ***
## BasementUnFinSF  2.108e-02  4.612e-03   4.571 5.94e-06 ***
## FirstSF          6.757e-02  6.526e-03  10.354  < 2e-16 ***
## SecondSF         5.798e-02  4.926e-03  11.770  < 2e-16 ***
## Bedroom         -1.446e+01  2.212e+00  -6.538 1.36e-10 ***
## TotalRooms       6.166e+00  1.596e+00   3.865 0.000124 ***
## GarageCars      -8.583e+00  3.638e+00  -2.359 0.018646 *  
## GarageSF         5.209e-02  1.259e-02   4.137 4.04e-05 ***
## ScreenPorchSF    8.861e-02  2.180e-02   4.065 5.45e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28.93 on 584 degrees of freedom
## Multiple R-squared:  0.8713, Adjusted R-squared:  0.868 
## F-statistic: 263.5 on 15 and 584 DF,  p-value: < 2.2e-16
vif(Mod1)
##     LotFrontage         LotArea         Quality       Condition       YearBuilt 
##        1.069460        1.114267        3.063347        1.569292        3.315401 
##     YearRemodel   BasementFinSF BasementUnFinSF         FirstSF        SecondSF 
##        2.183046        3.148899        3.071878        3.818144        3.367374 
##         Bedroom      TotalRooms      GarageCars        GarageSF   ScreenPorchSF 
##        2.446952        4.271214        5.582115        5.291878        1.059502

PART 2

plot(Mod1$residuals~Mod1$fitted.values)
abline(0,0)

#residuals analysis

#plot of residuals and fitted.values
plot(Mod1$residuals~Mod1$fitted.values)
abline(a=0,b=0)

#Normality of residuals
histogram(Mod1$residuals)

qqnorm(Mod1$residuals)
qqline(Mod1$residuals)

#qqPlot
plot(Mod1)

qqPlot(Mod1$resid)

## [1] 222 234

PART 3

plot(Price~LotFrontage+LotArea+I(Quality)^2+Condition+sqrt(YearBuilt)+YearRemodel+BasementFinSF+BasementUnFinSF, data=AmesTrainNum)

plot(Price~FirstSF+SecondSF+Bedroom+log(TotalRooms)+GarageCars+GarageSF+ScreenPorchSF, data=AmesTrainNum)

tMod =lm(Price~LotFrontage+LotArea+I(Quality^2)+Condition+sqrt(YearBuilt)+YearRemodel+BasementFinSF+BasementUnFinSF+FirstSF+SecondSF+Bedroom+log(TotalRooms)+GarageCars+GarageSF+ScreenPorchSF, data=AmesTrainNum)

plot(tMod$residuals~tMod$fitted.values)
abline(0,0)

mean(resid(Mod1))
## [1] -4.499237e-16
mean(resid(tMod))
## [1] -1.773755e-16
#residuals analysis

#plot of residuals and fitted.values
plot(tMod$residuals~tMod$fitted.values)
abline(a=0,b=0)

#Normality of residuals
histogram(tMod$residuals)

qqnorm(tMod$residuals)
qqline(tMod$residuals)

#qqPlot
plot(tMod)

qqPlot(tMod$resid)

## [1] 222 234
oMod =lm(Price~LotFrontage+LotArea+I(Quality^4)+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementUnFinSF+sqrt(FirstSF)+SecondSF+Bedroom+sqrt(TotalRooms)+I(GarageCars^2)+I(GarageSF^2)+ScreenPorchSF, data=AmesTrainNum)

plot(oMod$residuals~oMod$fitted.values)
abline(a=0,b=0)

#Normality of residuals
histogram(oMod$residuals)

qqnorm(oMod$residuals)
qqline(oMod$residuals)

#qqPlot
plot(oMod)

qqPlot(oMod$resid)

## [1] 222 234

```